############ Make a data set that will be used for the analysis #################
library(maps)

### If using fake data
## dir='/Users/coryzigler/Dropbox/Research/HEI Accountability Grant/PM10 Accountability/Data/'

### If using real data
## dir='/Users/coryzigler/Data/Medicare/Linked AQS-Medicare/HEI PM10 Accountability/'

## Read in the data file that has the imputed values of baseline PM and temperature
load(paste(dir, 'pm10dat_withbaselineimp.RData', sep=""))
datorig = dat

with(datorig, table(is.na(Tot_Beneficiary.2001)))
with(datorig, table(Tot_Beneficiary.2001 >= 20))


### Look at Montioring sites that have missing Medicare
#map("state")
#with(datorig, points(Longitude, Latitude, pch = 16, col = 1+is.na(Tot_Beneficiary.2001)))
#mismedids = datorig[is.na(datorig$Tot_Beneficiary.2001), c("Monitor", "Longitude", "Latitude")]
#write.table(mismedids, file = "MissingMedicareMonitors.csv", sep = ",")

dat = subset(datorig, Tot_Beneficiary.2001>=20) ## 547 locations

map("state", xlim=c(-125,-100), ylim=c(25,50))
with(dat, points(Longitude, Latitude , col=a+1, pch=16))
dim(dat)
with(dat, table(a))


############################################################################
######              Select Variables for Analysis                    #######
############################################################################

medyears = 1999:2010
aqsyears = 1990:2014
dat = dat[, names(dat) %in% c("Monitor", "FIPS", "State.Name", "County.Name", "a", "Median_income", "HS_rate", "Urban_rate", "Migration_5_year_rate",
                              "Hispanic_cens", "White_cens", "Black_cens", "Other_cens", "Current_Smoking_rate_weighted", "HOUSEUNIT", "POPULATION",
                              "Female_cens", paste("mean_age", medyears, sep="."), paste("Female_rate", medyears, sep="."), 
                              paste("White_rate", medyears, sep="."), paste("Black_rate", medyears, sep="."), 
                              paste("Total_death", medyears, sep="."), paste("Tot_Beneficiary", medyears, sep="."), paste("pyear", medyears, sep="."),
                              paste("ALL_CVD", medyears, sep="."), paste("AMI", medyears, sep="."), paste("COPD", medyears, sep="."),
                              paste("CV_stroke", medyears, sep="."), paste("HF", medyears, sep="."), paste("HRD", medyears, sep="."),
                              paste("IHD", medyears, sep="."), paste("PVD", medyears, sep="."), paste("RTI", medyears, sep="."), 
                              paste("HRP", medyears, sep="."), paste("Respiratory", medyears, sep="."),
                              paste("Arithmetic.Mean", aqsyears, sep="."), paste("X98th.Percentile", aqsyears, sep="."), 
                              paste("X95th.Percentile", aqsyears, sep="."), paste("X90th.Percentile", aqsyears, sep="."),
                              "Longitude", "Latitude", "pm10base1990", "baseorig1990", "pm10base1990_1994", "baseorig1990_1994", "pm10fu", "pm10fu95",
                              "temperature", "temporig", "pm10baseorig", "housedens", "loginc", "logpop", "lhousedens")]


save(dat, file=paste(dir, 'pm10analysis.RData', sep=""))
